import requests
import os
import re

def search(page, req_head):
    sites = []
    for i in range(page):
        url = "https://www.baidu.com/s?wd=inurl:{0}&pn={1}0".format(key, i)
        print("[+] 正在扫描第{0}页，url为：{1}".format(i, url))
        response = requests.get(url, headers=req_head).content
        print(response)
        subdomains = re.findall('(\w+\.ntu\.edu\.cn\.com)', response.decode('utf8'))

        while not subdomains:     # 被反爬虫机制检测到，重新访问直到访问成功
            print("[-] 被反爬虫检测到，尝试重新访问")
            response = requests.get(url, headers=req_head).content
            subdomains = re.findall('(\w+\.ntu\.edu\.cn)', response.decode('utf8'))
        print("[+] 访问成功搜集到子域名：", subdomains)
        sites += list(subdomains)
    site = list(set(sites))  # set()实现去重
    # print site
    print("The number of sites is %d" % len(site))
    return site


if __name__ == '__main__':
    key = input("请输入要扫描的域名：")
    head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36'}
    site = search(50, req_head=head)
    file_path = os.path.dirname(__file__)+"/子域名扫描结果.txt"   # 在当前目录创建文件，保存结果
    fp = open(file_path, 'w', encoding='utf-8')
    for i in site:
        fp.write(i+"\n")
        print("[+] 正在写入扫描结果："+i)
    print("[+] 扫描任务结束，结果保存在当前目录下，文件名：子域名扫描结果.txt")